Decontam

Paths and libraries setting

# Load main packages, paths and custom functions
source("../../../source/main_packages.R")
source("../../../source/paths.R")
source("../../../source/functions.R")

# Load supplementary packages
packages <- c("decontam", "kableExtra", "microbiome", "cowplot")
invisible(lapply(packages, require, character.only = TRUE))

Import MED phyloseq object

setwd(path_rdata)
ps <- readRDS("MED_phyloseq.rds")

Decontam

Detect contaminants

setwd(path_tsv)

# set threshold
threshold=0.6
  
# Preprocess
df <- as.data.frame(sample_data(ps)) # Put sample_data into a ggplot-friendly data.frame
#df$LibrarySize <- sample_sums(ps)
df <- df[order(df$Read_depth),]
df$Index <- seq(nrow(df))

# read depth 
p <- ggplot(data=df, aes(x=Index, y=Read_depth, color=Dna)) + geom_point()
p 

# convert Dna column into numeric
sample_data(ps)$Dna <- as.numeric(get_variable(ps, "Dna"))

# Identifier les contaminants - méthode de PREVALENCE
sample_data(ps)$is.neg <- sample_data(ps)$Control == "Control sample"
contamdf.prev <- isContaminant(ps, method="prevalence", neg="is.neg", threshold=threshold)

# table of positive and false contaminant
table(contamdf.prev$contaminant)
## 
## FALSE  TRUE 
##    67     3
# contaminant nodes
decontam_asv_MED  <- row.names(contamdf.prev[contamdf.prev$contaminant == TRUE, ])

# make phyloseq object of presence-absence in negative controls and true samples
ps.pa <- transform_sample_counts(ps, function(abund) 1*(abund>0))
ps.pa.neg <- prune_samples(sample_data(ps.pa)$Control == "Control sample", ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$Control == "True sample", ps.pa)

# make dataframe from phyloseq objects of presence-absence
df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
                    contaminant=contamdf.prev$contaminant)

# plot
p2 <- ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() +
  xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)")

p2

## Ecrire la table d'ASV contaminants détectés par prévalence
tax <- tax_table(ps) %>% as.matrix()
decontam_df_MED <- tax[row.names(tax) %in% decontam_asv_MED , ]

# print contaminants
decontam_df_MED   %>% 
  kbl() %>%
  kable_paper("hover", full_width = F)
Kingdom Phylum Class Order Family Genus Species
N0005 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Moraxellaceae Enhydrobacter NA
N0315 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales Yersiniaceae Rahnella1 NA
N0852 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
# histogram of the scores
p <- hist(contamdf.prev$p, 100, ylim = c(0,25), xlim = c(0,1), main="", xlab="p", ylab="Frequency")

Save contaminant table

Remove contaminants from the phyloseq object

# remove contaminants ASV
alltaxa <- taxa_names(ps)
decontam_taxa <- alltaxa[!(alltaxa %in% decontam_asv_MED)]
ps2 <- prune_taxa(decontam_taxa, ps)

# check ps 
ps2 <- check_ps(ps2)
ps2
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 67 taxa and 123 samples ]
## sample_data() Sample Data:       [ 123 samples by 17 sample variables ]
## tax_table()   Taxonomy Table:    [ 67 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 67 reference sequences ]
compute_read_counts(ps2)
## [1] 6469784
compute_read_counts(ps)-compute_read_counts(ps2)
## [1] 17460
ps <- ps2

Remove blanks

# blanks
ps.blanks <- subset_samples(ps, Strain=="Blank")
ps.blanks <- check_ps(ps.blanks)
ps.blanks
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 24 taxa and 8 samples ]
## sample_data() Sample Data:       [ 8 samples by 17 sample variables ]
## tax_table()   Taxonomy Table:    [ 24 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 24 reference sequences ]
compute_read_counts(ps.blanks)
## [1] 16211
# supprimer blanks
ps.filter <- subset_samples(ps, Strain!="Blank")
ps.filter <- check_ps(ps.filter)
ps.filter
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 67 taxa and 115 samples ]
## sample_data() Sample Data:       [ 115 samples by 17 sample variables ]
## tax_table()   Taxonomy Table:    [ 67 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 67 reference sequences ]
compute_read_counts(ps.filter)
## [1] 6453573
# check nreads
summarize_phyloseq(ps.filter)
## Compositional = NO2
## 1] Min. number of reads = 952] Max. number of reads = 7361593] Total number of reads = 64535734] Average number of reads = 56118.02608695655] Median number of reads = 251717] Sparsity = 0.6486696950032456] Any OTU sum to 1 or less? NO8] Number of singletons = 09] Percent of OTUs that are singletons 
##         (i.e. exactly one read detected across all samples)010] Number of sample variables are: 17SampleWellStrainFieldCountryOrganSpeciesIndividualIndividualsDateRunControlDnaSpecies_italicStrain_italicRead_depthis.neg2
## [[1]]
## [1] "1] Min. number of reads = 95"
## 
## [[2]]
## [1] "2] Max. number of reads = 736159"
## 
## [[3]]
## [1] "3] Total number of reads = 6453573"
## 
## [[4]]
## [1] "4] Average number of reads = 56118.0260869565"
## 
## [[5]]
## [1] "5] Median number of reads = 25171"
## 
## [[6]]
## [1] "7] Sparsity = 0.648669695003245"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
## 
## [[8]]
## [1] "8] Number of singletons = 0"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 17"
## 
## [[11]]
##  [1] "Sample"         "Well"           "Strain"         "Field"         
##  [5] "Country"        "Organ"          "Species"        "Individual"    
##  [9] "Individuals"    "Date"           "Run"            "Control"       
## [13] "Dna"            "Species_italic" "Strain_italic"  "Read_depth"    
## [17] "is.neg"

Counts

Create dataframe

x <- c("Culex pipiens", "Field - Bosc", "Field - Camping Europe", "Laboratory - Lavar", 
       "Culex quinquefasciatus", "Field - Guadeloupe", "Laboratory - Slab TC (Wolbachia -)",
       "Aedes aegypti (Guadeloupe)",
       "Total")
y <- c("Reads", "Oligotypes", "Samples")

df <- data.frame(matrix(ncol=3, nrow=9))
rownames(df) <- x
colnames(df) <- y

df2 <- df
df3 <- df

Whole + ovary

# Culex pipiens
ps.pipiens <- subset_samples(ps.filter, Species=="Culex pipiens")
ps.pipiens <- check_ps(ps.pipiens)

## All Strain
### oligotype
nrow(ps.pipiens@otu_table) -> df["Culex pipiens", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data) -> df["Culex pipiens", "Samples"]
### reads
compute_read_counts(ps.pipiens) -> df["Culex pipiens", "Reads"]


## Bosc
### oligotype
nrow(otu_table(ps.pipiens %>% 
                 subset_samples(Strain=="Field - Bosc") %>% 
                 check_ps())) -> df["Field - Bosc", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data %>% subset_samples(Strain=="Field - Bosc")) -> df["Field - Bosc", "Samples"]
### reads
compute_read_counts(ps.pipiens %>% subset_samples(Strain=="Field - Bosc")) -> df["Field - Bosc", "Reads"]


## Camping Europe
### oligotype
nrow(otu_table(ps.pipiens %>% 
                 subset_samples(Strain=="Field - Camping Europe") %>% 
                 check_ps())) -> df["Field - Camping Europe", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data %>% subset_samples(Strain=="Field - Camping Europe")) -> df["Field - Camping Europe", "Samples"]
### reads
compute_read_counts(ps.pipiens %>% subset_samples(Strain=="Field - Camping Europe")) -> df["Field - Camping Europe", "Reads"]

## Lavar (labo)
### oligotype
nrow(otu_table(ps.pipiens %>% 
                 subset_samples(Strain=="Laboratory - Lavar") %>% 
                 check_ps())) -> df["Laboratory - Lavar", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data %>% subset_samples(Strain=="Laboratory - Lavar")) -> df["Laboratory - Lavar", "Samples"]
### reads
compute_read_counts(ps.pipiens %>% subset_samples(Strain=="Laboratory - Lavar")) -> df["Laboratory - Lavar", "Reads"]


# Culex quinquefasciatus
ps.quinque <- subset_samples(ps.filter, Species=="Culex quinquefasciatus")
ps.quinque <- check_ps(ps.quinque)

## All Strain
### oligotype
nrow(ps.quinque@otu_table) -> df["Culex quinquefasciatus", "Oligotypes"]
### samples
nrow(ps.quinque@sam_data) -> df["Culex quinquefasciatus", "Samples"]
### reads
compute_read_counts(ps.quinque) -> df["Culex quinquefasciatus", "Reads"]


## Guadeloupe
### oligotype
nrow(otu_table(ps.quinque %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df["Field - Guadeloupe", "Oligotypes"]
### samples
nrow(ps.quinque@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df["Field - Guadeloupe", "Samples"]
### reads
compute_read_counts(ps.quinque %>% subset_samples(Strain=="Field - Guadeloupe")) -> df["Field - Guadeloupe", "Reads"]


## Wolbachia -
### oligotype
nrow(otu_table(ps.quinque %>% 
                 subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)") %>% 
                 check_ps())) -> df["Laboratory - Slab TC (Wolbachia -)", "Oligotypes"]
### samples
nrow(ps.quinque@sam_data %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df["Laboratory - Slab TC (Wolbachia -)", "Samples"]
### reads
compute_read_counts(ps.quinque %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df["Laboratory - Slab TC (Wolbachia -)", "Reads"]


# Aedes aegypti
ps.aedes <- subset_samples(ps.filter, Species=="Aedes aegypti")
ps.aedes <- check_ps(ps.aedes)

## Guadeloupe
### oligotype
nrow(otu_table(ps.aedes %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df["Aedes aegypti (Guadeloupe)", "Oligotypes"]
### samples
nrow(ps.aedes@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df["Aedes aegypti (Guadeloupe)", "Samples"]
### reads
compute_read_counts(ps.aedes %>% subset_samples(Strain=="Field - Guadeloupe")) -> df["Aedes aegypti (Guadeloupe)", "Reads"]

# Total
### oligotype
nrow(ps.filter@otu_table) -> df["Total", "Oligotypes"]
### samples
nrow(ps.filter@sam_data) -> df["Total", "Samples"]
### reads
compute_read_counts(ps.filter) -> df["Total", "Reads"]

df %>% 
  kbl() %>%
  kable_paper("hover", full_width = F)
Reads Oligotypes Samples
Culex pipiens 2574603 49 83
Field - Bosc 803574 36 23
Field - Camping Europe 869617 39 25
Laboratory - Lavar 901412 48 35
Culex quinquefasciatus 1925054 59 21
Field - Guadeloupe 1721053 47 7
Laboratory - Slab TC (Wolbachia -) 204001 45 14
Aedes aegypti (Guadeloupe) 1953916 54 11
Total 6453573 67 115

Whole

# Culex pipiens
ps.pipiens.whole <- ps.pipiens %>% subset_samples(Organ=="Whole")
ps.pipiens.whole <- ps.pipiens.whole %>% check_ps()

## All Strain
### oligotype
nrow(ps.pipiens.whole@otu_table) -> df2["Culex pipiens", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data) -> df2["Culex pipiens", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole) -> df2["Culex pipiens", "Reads"]


## Bosc
### oligotype
nrow(otu_table(ps.pipiens.whole %>% 
                 subset_samples(Strain=="Field - Bosc") %>% 
                 check_ps())) -> df2["Field - Bosc", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data %>% subset_samples(Strain=="Field - Bosc")) -> df2["Field - Bosc", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole %>% subset_samples(Strain=="Field - Bosc")) -> df2["Field - Bosc", "Reads"]


## Camping Europe
### oligotype
nrow(otu_table(ps.pipiens.whole %>% 
                 subset_samples(Strain=="Field - Camping Europe") %>% 
                 check_ps())) -> df2["Field - Camping Europe", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data %>% subset_samples(Strain=="Field - Camping Europe")) -> df2["Field - Camping Europe", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole %>% subset_samples(Strain=="Field - Camping Europe")) -> df2["Field - Camping Europe", "Reads"]

## Lavar (labo)
### oligotype
nrow(otu_table(ps.pipiens.whole %>% 
                 subset_samples(Strain=="Laboratory - Lavar") %>% 
                 check_ps())) -> df2["Laboratory - Lavar", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data %>% subset_samples(Strain=="Laboratory - Lavar")) -> df2["Laboratory - Lavar", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole %>% subset_samples(Strain=="Laboratory - Lavar")) -> df2["Laboratory - Lavar", "Reads"]



# Culex quinquefasciatus
ps.quinque.whole <- subset_samples(ps.quinque, Organ=="Whole")
ps.quinque.whole <- check_ps(ps.quinque.whole)

## All Strain
### oligotype
nrow(ps.quinque.whole@otu_table) -> df2["Culex quinquefasciatus", "Oligotypes"]
### samples
nrow(ps.quinque.whole@sam_data) -> df2["Culex quinquefasciatus", "Samples"]
### reads
compute_read_counts(ps.quinque.whole) -> df2["Culex quinquefasciatus", "Reads"]


## Guadeloupe
### oligotype
nrow(otu_table(ps.quinque.whole %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df2["Field - Guadeloupe", "Oligotypes"]
### samples
nrow(ps.quinque.whole@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df2["Field - Guadeloupe", "Samples"]
### reads
compute_read_counts(ps.quinque.whole %>% subset_samples(Strain=="Field - Guadeloupe")) -> df2["Field - Guadeloupe", "Reads"]


## Wolbachia -
### oligotype
nrow(otu_table(ps.quinque.whole %>% 
                 subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)") %>% 
                 check_ps())) -> df2["Laboratory - Slab TC (Wolbachia -)", "Oligotypes"]
### samples
nrow(ps.quinque.whole@sam_data %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df2["Laboratory - Slab TC (Wolbachia -)", "Samples"]
### reads
compute_read_counts(ps.quinque.whole %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df2["Laboratory - Slab TC (Wolbachia -)", "Reads"]



# Aedes aegypti
ps.aedes.whole <- subset_samples(ps.aedes, Organ=="Whole")
ps.aedes.whole <- check_ps(ps.aedes.whole)

## Guadeloupe
### oligotype
nrow(otu_table(ps.aedes.whole %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df2["Aedes aegypti (Guadeloupe)", "Oligotypes"]
### samples
nrow(ps.aedes.whole@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df2["Aedes aegypti (Guadeloupe)", "Samples"]
### reads
compute_read_counts(ps.aedes.whole %>% subset_samples(Strain=="Field - Guadeloupe")) -> df2["Aedes aegypti (Guadeloupe)", "Reads"]


# Total
ps.whole <- ps.filter %>% subset_samples(Organ=="Whole")
ps.whole <- ps.whole %>% check_ps()
### oligotype
nrow(ps.whole@otu_table) -> df2["Total", "Oligotypes"]
### samples
nrow(ps.whole@sam_data) -> df2["Total", "Samples"]
### reads
compute_read_counts(ps.whole) -> df2["Total", "Reads"]

df2 %>% 
  kbl() %>%
  kable_paper("hover", full_width = F)
Reads Oligotypes Samples
Culex pipiens 1251766 45 42
Field - Bosc 545790 33 13
Field - Camping Europe 98804 34 7
Laboratory - Lavar 607172 45 22
Culex quinquefasciatus 205761 57 18
Field - Guadeloupe 1760 38 4
Laboratory - Slab TC (Wolbachia -) 204001 45 14
Aedes aegypti (Guadeloupe) 1945807 54 9
Total 3403334 67 69

Ovary

# Culex pipiens
ps.pipiens.ovary <- ps.pipiens %>% subset_samples(Organ=="Ovary")
ps.pipiens.ovary <- ps.pipiens.ovary %>% check_ps()

## All Strain
### oligotype
nrow(ps.pipiens.ovary@otu_table) -> df3["Culex pipiens", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data) -> df3["Culex pipiens", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary) -> df3["Culex pipiens", "Reads"]


## Bosc
### oligotype
nrow(otu_table(ps.pipiens.ovary %>% 
                 subset_samples(Strain=="Field - Bosc") %>% 
                 check_ps())) -> df3["Field - Bosc", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data %>% subset_samples(Strain=="Field - Bosc")) -> df3["Field - Bosc", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary %>% subset_samples(Strain=="Field - Bosc")) -> df3["Field - Bosc", "Reads"]


## Camping Europe
### oligotype
nrow(otu_table(ps.pipiens.ovary %>% 
                 subset_samples(Strain=="Field - Camping Europe") %>% 
                 check_ps())) -> df3["Field - Camping Europe", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data %>% subset_samples(Strain=="Field - Camping Europe")) -> df3["Field - Camping Europe", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary %>% subset_samples(Strain=="Field - Camping Europe")) -> df3["Field - Camping Europe", "Reads"]

## Lavar (labo)
### oligotype
nrow(otu_table(ps.pipiens.ovary %>% 
                 subset_samples(Strain=="Laboratory - Lavar") %>% 
                 check_ps())) -> df3["Laboratory - Lavar", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data %>% subset_samples(Strain=="Laboratory - Lavar")) -> df3["Laboratory - Lavar", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary %>% subset_samples(Strain=="Laboratory - Lavar")) -> df3["Laboratory - Lavar", "Reads"]



# Culex quinquefasciatus
ps.quinque.ovary <- subset_samples(ps.quinque, Organ=="Ovary")
ps.quinque.ovary <- check_ps(ps.quinque.ovary)

## All Strain
### oligotype
nrow(ps.quinque.ovary@otu_table) -> df3["Culex quinquefasciatus", "Oligotypes"]
### samples
nrow(ps.quinque.ovary@sam_data) -> df3["Culex quinquefasciatus", "Samples"]
### reads
compute_read_counts(ps.quinque.ovary) -> df3["Culex quinquefasciatus", "Reads"]


## Guadeloupe
### oligotype
nrow(otu_table(ps.quinque.ovary %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df3["Field - Guadeloupe", "Oligotypes"]
### samples
nrow(ps.quinque.ovary@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df3["Field - Guadeloupe", "Samples"]
### reads
compute_read_counts(ps.quinque.ovary %>% subset_samples(Strain=="Field - Guadeloupe")) -> df3["Field - Guadeloupe", "Reads"]


## Wolbachia -
### oligotype
# nrow(otu_table(ps.quinque.ovary %>% 
#                  subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)") %>% 
#                  check_ps())) -> df3["Laboratory - Slab TC (Wolbachia -)", "Oligotypes"]
### samples
# nrow(ps.quinque.ovary@sam_data %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df3["Laboratory - Slab TC (Wolbachia -)", "Samples"]
# ### reads
# compute_read_counts(ps.quinque.ovary %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df3["Laboratory - Slab TC (Wolbachia -)", "Reads"]



# Aedes aegypti
ps.aedes.ovary <- subset_samples(ps.aedes, Organ=="Ovary")
ps.aedes.ovary <- check_ps(ps.aedes.ovary)

## Guadeloupe
### oligotype
nrow(otu_table(ps.aedes.ovary %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df3["Aedes aegypti (Guadeloupe)", "Oligotypes"]
### samples
nrow(ps.aedes.ovary@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df3["Aedes aegypti (Guadeloupe)", "Samples"]
### reads
compute_read_counts(ps.aedes.ovary %>% subset_samples(Strain=="Field - Guadeloupe")) -> df3["Aedes aegypti (Guadeloupe)", "Reads"]


# Total
ps.ovary <- ps.filter %>% subset_samples(Organ=="Ovary")
ps.ovary <- ps.ovary %>% check_ps()
### oligotype
nrow(ps.ovary@otu_table) -> df3["Total", "Oligotypes"]
### samples
nrow(ps.ovary@sam_data) -> df3["Total", "Samples"]
### reads
compute_read_counts(ps.ovary) -> df3["Total", "Reads"]

df3[is.na(df3)] <- 0

df3 %>% 
  kbl() %>%
  kable_paper("hover", full_width = F)
Reads Oligotypes Samples
Culex pipiens 1322837 34 41
Field - Bosc 257784 26 10
Field - Camping Europe 770813 27 18
Laboratory - Lavar 294240 32 13
Culex quinquefasciatus 1719293 26 3
Field - Guadeloupe 1719293 26 3
Laboratory - Slab TC (Wolbachia -) 0 0 0
Aedes aegypti (Guadeloupe) 8109 30 2
Total 3050239 48 46

Plot of number of reads by sample

sample_data(ps.filter)$Read_depth <- sample_sums(ps.filter)
metadata_read <- data.frame(ps.filter@sam_data)

p <- ggplot(metadata_read, aes(x = Sample, y = Read_depth))+ 
  geom_bar(position = "dodge", stat = "identity")+
    scale_fill_manual(values = col)+
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  ggtitle("") + 
  theme(legend.title = element_text(size = 18), 
        legend.position="bottom",
        legend.text=element_text(size=11), 
        panel.spacing.y=unit(1, "lines"), 
        panel.spacing.x=unit(0.8, "lines"),
        panel.spacing=unit(0,"lines"),
        strip.background=element_rect(color="grey30", fill="grey90"),
        panel.border=element_rect(color="grey90"),
        axis.ticks.x=element_blank(),
        strip.text.x = element_text(size = 12)) +
 facet_wrap(~Species_italic+Strain_italic+Organ, scales = "free_x", ncol=4, labeller=label_parsed)+
  labs(y="Sequence counts")+
  ylim(0, 550000)+
  geom_text(aes(label=Read_depth), position=position_dodge(width=0.5), size=2, hjust=-0.25, vjust=0.25, angle=90)+
  guides(fill=guide_legend(title="Oligotype", label.theme = element_text(size = 10, face = "italic", colour = "Black", angle = 0)))

p

Filter samples

Remove sample with number of reads <1000

a <- prune_samples(sample_sums(ps.filter)<=1000, ps.filter)
test <- data.frame(a@sam_data)
test <- test[test$Sample!="NP20" & test$Sample!="NP29" & test$Sample!="NP30" & test$Sample!="NP34" & test$Sample!="NP36",]
toremove <- test$Sample

ps.filter <- subset_samples(ps.filter, !(Sample %in% toremove))
ps.filter
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 67 taxa and 113 samples ]
## sample_data() Sample Data:       [ 113 samples by 17 sample variables ]
## tax_table()   Taxonomy Table:    [ 67 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 67 reference sequences ]
compute_read_counts(ps.filter)
## [1] 6452623

Plot of final number of reads by sample

sample_data(ps.filter)$Read_depth <- sample_sums(ps.filter)
metadata_read <- data.frame(ps.filter@sam_data)
metadata_read_whole <- metadata_read[metadata_read$Organ=="Whole",]
metadata_read_ovary <- metadata_read[metadata_read$Organ=="Ovary",]


guide_italics <- guides(fill = guide_legend(label.theme = element_text(size = 10, face = "italic", colour = "Black", angle = 0)))

p <- ggplot(metadata_read_whole, aes(x = Sample, y = Read_depth))+ 
  geom_bar(position = "dodge", stat = "identity")+
    scale_fill_manual(values = col)+
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust=1, hjust=1, size=15)) +
  ggtitle("") + 
  guide_italics+
  theme(legend.title = element_text(size = 18), 
        legend.position="bottom",
        legend.text=element_text(size=16), 
        panel.spacing.y=unit(1, "lines"), 
        panel.spacing.x=unit(0.8, "lines"),
        panel.spacing=unit(0,"lines"),
        strip.background=element_rect(color="grey30", fill="grey90"),
        panel.border=element_rect(color="grey90"),
        axis.ticks.x=element_blank(),
        strip.text.x = element_text(size = 16),
        axis.text = element_text(size = 15)) +
 facet_wrap(~Species_italic+Strain_italic+Organ, scales = "free_x", ncol=3, labeller=label_parsed)+
  labs(y="Sequence counts")+
  ylim(0, 1000000)+
  geom_text(aes(label=Read_depth), position=position_dodge(width=0.5), size=4, angle=90, hjust=-0.1, vjust=0.25)+
  guides(fill=guide_legend(title="Oligotype"))

p

p2 <- ggplot(metadata_read_ovary, aes(x = Sample, y = Read_depth))+ 
  geom_bar(position = "dodge", stat = "identity")+
    scale_fill_manual(values = col)+
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust=1, hjust=1, size=15)) +
  ggtitle("") + 
  guide_italics+
  theme(legend.title = element_text(size = 18), 
        legend.position="bottom",
        legend.text=element_text(size=16), 
        panel.spacing.y=unit(1, "lines"), 
        panel.spacing.x=unit(0.8, "lines"),
        panel.spacing=unit(0,"lines"),
        strip.background=element_rect(color="grey30", fill="grey90"),
        panel.border=element_rect(color="grey90"),
        axis.ticks.x=element_blank(),
        strip.text.x = element_text(size = 16),
        axis.text = element_text(size = 15)) +
 facet_wrap(~Species_italic+Strain_italic+Organ, scales = "free_x", ncol=3, labeller=label_parsed)+
  labs(y="Sequence counts")+
  ylim(0, 1000000)+
  geom_text(aes(label=Read_depth), position=position_dodge(width=0.5), size=4, angle=90, hjust=-0.1, vjust=0.25)+
  guides(fill=guide_legend(title="Oligotype"))

p2

# panels
p_group <- plot_grid(p+theme(legend.position="none"), 
          p2+theme(legend.position="none"), 
          nrow=2, 
          ncol=1)+
    draw_plot_label(c("A", "B"), c(0, 0), c(1, 0.5), size = 15)

legend_plot <- get_legend(p + theme(legend.position="bottom"))

p_counts <- plot_grid(p_group, legend_plot, nrow=2, ncol=1, rel_heights = c(1, .1))
p_counts

Plot of final number of reads by Genus

new_names_genus <- c("Wolbachia",
               "Asaia",
               "Legionella",
               "Elizabethkingia",
               "Chryseobacterium",
               "Erwinia",
               "Morganella",
               "Pseudomonas",
               "Delftia",
               "Methylobacterium-Methylorubrum",
               "Serratia",
               "Coetzeea",
               "NA"
)

col_genus <- c("Wolbachia"="#FEB24C",
               "Asaia"="#10E015",
               "Legionella"="#DE3F23",
               "Elizabethkingia"="#66A7ED",
               "Chryseobacterium"="#F899FF",
               "Erwinia"="#FFE352",
               "Morganella"="#F5E4D3",
               "Pseudomonas"="#DBF5F0",
               "Delftia"="#C7C5B7",
               "Methylobacterium-Methylorubrum"="blue",
               "Serratia"="#B136F5",
               "Coetzeea"="red",
               "NA"="grey")


p <- plot_bar(ps.filter, "Genus", "Abundance", "Genus")+
  geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack")+
  scale_fill_manual(values = col_genus)+
  labs(x="Genus", y="Number of read") +
 facet_wrap(~sample_Species+Strain+Organ, scales = "free", ncol=3)
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables: 
## Species
##  have been renamed to: 
## sample_Species
## to avoid conflicts with taxonomic rank names.
p

# ps.filter2 <- ps.filter
# p = plot_bar(ps.filter2, x="Genus", y="Abundance", fill="Genus", facet_grid=~sample_Species+Strain+Organ)
# p + geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack")
# 
# levels(ps.filter2@sam_data$Species)= c("Aedes aegypti"=make.italic("Aedes aegypti"),
#                "Culex pipiens"=make.italic("Culex pipiens"),
#                "Culex quinquefasciatus"=make.italic("Culex quinquefasciatus"))
# 
# levels(ps.filter2@sam_data$Strain) <- c("Bosc", "Camping~Europe", "Guadeloupe", "Lavar~(lab)", expression(paste(italic("Wolbachia"), "- (Slab TC)")))
# 
#       test <- dataframe[dataframe$sample_Species==selection,] %>% 
#         group_by({{variable}}, Genus) %>% 
#         summarise(rel_ab = sum(Abundance))
#       
#       
# p = plot_bar(ps.filter2, x="Genus", y="Abundance", fill="Genus")
# p1 <- p + 
#   geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack")+
#   facet_wrap(~sample_Species+Strain+Organ, scales = "free", ncol=3, labeller=label_parsed)+
#     theme(legend.title = element_text(size = 20), 
#         legend.position="bottom",
#         legend.text = element_text(size=14),
#         panel.spacing.y=unit(1, "lines"),
#         panel.spacing.x=unit(0.8, "lines"),
#         panel.spacing=unit(0,"lines"),
#         strip.background=element_rect(color="grey30", fill="grey90"),
#         strip.text.x = element_text(size = 16),
#         #panel.border=element_rect(color="grey90"),
#         axis.ticks.x=element_blank(),
#         axis.text.y = element_text(size=18))+
#   #geom_text(aes(label=Abundance),position=position_dodge(width=0.9), size=4, angle=90, hjust=-0.1, vjust=0.25)+
#   #geom_text(aes(label = signif(Abundance, digits = 3)), nudge_y = 4)+
#   labs(x="Sample", y="Number of reads", fill="Genus")
# 
# setwd(path_plot)
# png("TEST.png", units="in", width=20, height=25, res=300)
# p1
# dev.off()
data <- p$data

labels = c("Wolbachia"=make.italic("Wolbachia"),
                      "Asaia"=make.italic("Asaia"),
                      "Legionella"=make.italic("Legionella"),
                      "Elizabethkingia"=make.italic("Elizabethkingia"),
                      "Chryseobacterium"=make.italic("Chryseobacterium"),
                      "Erwinia"=make.italic("Erwinia"),
                      "Morganella"=make.italic("Morganella"),
                      "Pseudomonas"=make.italic("Pseudomonas"),
                      "Delftia"=make.italic("Delftia"),
                      "Methylobacterium-Methylorubrum"=make.italic("Methylobacterium-Methylorubrum"),
                      "Serratia"=make.italic("Serratia"),
                      "Coetzeea"=make.italic("Coetzeea"),
                      "NA"
)


data_pipiens_whole <- data[data$sample_Species=="Culex pipiens" & data$Organ=="Whole",]
data_pipiens_ovary <- data[data$sample_Species=="Culex pipiens" & data$Organ=="Ovary",]

data_quinque_whole <- data[data$sample_Species=="Culex quinquefasciatus" & data$Organ=="Whole",]
data_quinque_ovary <- data[data$sample_Species=="Culex quinquefasciatus" & data$Organ=="Ovary",]

data_aedes_whole <- data[data$sample_Species=="Aedes aegypti" & data$Organ=="Whole",]
data_aedes_ovary <- data[data$sample_Species=="Aedes aegypti" & data$Organ=="Ovary",]

# pipiens
## whole
### all
pipiens1 <- percent_taxonomic_plot_test(data=data_pipiens_whole, variable=sample_Species, selection="Culex pipiens", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(subtitle= "Culex pipiens - Whole")+
  theme(plot.tag.position = "topright",
        plot.subtitle=element_text(size=10, face="italic", color="black"))
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
### field
pipiens2 <- percent_taxonomic_plot_test(data=data_pipiens_whole, variable=Strain, selection="Field - Bosc", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(subtitle = "Culex pipiens - Whole")+
  theme(plot.tag.position = "topright",
        plot.subtitle=element_text(size=10, face="italic", color="black"))
## `summarise()` regrouping output by 'Strain' (override with `.groups` argument)
## Warning: Unknown or uninitialised column: `sample_Species`.
## Warning: Unknown or uninitialised column: `Organ`.
pipiens3 <- percent_taxonomic_plot_test(data=data_pipiens_whole, variable=Strain, selection="Field - Camping Europe", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(subtitle = "Culex pipiens - Whole")+
  theme(plot.tag.position = "topright",
        plot.subtitle=element_text(size=10, face="italic", color="black"))
## `summarise()` regrouping output by 'Strain' (override with `.groups` argument)
## Warning: Unknown or uninitialised column: `sample_Species`.

## Warning: Unknown or uninitialised column: `Organ`.
### labo
pipiens4 <- percent_taxonomic_plot_test(data=data_pipiens_whole, variable=Strain, selection="Laboratory - Lavar", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(subtitle = "Culex pipiens - Whole")+
  theme(plot.tag.position = "topright",
        plot.subtitle=element_text(size=10, face="italic", color="black"))
## `summarise()` regrouping output by 'Strain' (override with `.groups` argument)
## Warning: Unknown or uninitialised column: `sample_Species`.

## Warning: Unknown or uninitialised column: `Organ`.
## ovary
## all
pipiens5 <- percent_taxonomic_plot_test(data=data_pipiens_ovary, variable=sample_Species, selection="Culex pipiens", group="Genus", organ="Ovary", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(subtitle = "Culex pipiens - Ovary")+
  theme(plot.tag.position = "topright",
        plot.subtitle=element_text(size=10, face="italic", color="black"))
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
# quinque
## whole
### all
quinque1 <- percent_taxonomic_plot_test(data=data_quinque_whole, variable=sample_Species, selection="Culex quinquefasciatus", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(subtitle = "Culex quinquefasciatus - Whole")+
  theme(plot.tag.position = "topright",
        plot.subtitle=element_text(size=10, face="italic", color="black"))
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
### field
quinque2 <- percent_taxonomic_plot_test(data=data_quinque_whole, variable=Strain, selection="Field - Guadeloupe", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(subtitle = "Culex quinquefasciatus - Whole")+
  theme(plot.tag.position = "topright",
        plot.subtitle=element_text(size=10, face="italic", color="black"))
## `summarise()` regrouping output by 'Strain' (override with `.groups` argument)
## Warning: Unknown or uninitialised column: `sample_Species`.

## Warning: Unknown or uninitialised column: `Organ`.
### labo
quinque3 <- percent_taxonomic_plot_test(data=data_quinque_whole, variable=Strain, selection="Laboratory - Slab TC (Wolbachia -)",group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(subtitle = "Culex quinquefasciatus - Whole")+
  theme(plot.tag.position = "topright",
        plot.subtitle=element_text(size=10, face="italic", color="black"))
## `summarise()` regrouping output by 'Strain' (override with `.groups` argument)
## Warning: Unknown or uninitialised column: `sample_Species`.

## Warning: Unknown or uninitialised column: `Organ`.
quinque3[["labels"]][["title"]] <- expression(paste(italic("Wolbachia"), "- (Slab TC)"))

## ovary
## all
quinque4 <- percent_taxonomic_plot_test(data=data_quinque_ovary, variable=sample_Species, selection="Culex quinquefasciatus",group="Genus", organ="Ovary", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(subtitle = "Culex quinquefasciatus - Ovary")+
  theme(plot.tag.position = "topright",
        plot.subtitle=element_text(size=10, face="italic", color="black"))
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
# aedes
## whole
### all
aedes1 <- percent_taxonomic_plot_test(data=data_aedes_whole, variable=sample_Species, selection="Aedes aegypti",group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(subtitle = "Aedes aegypti - Whole")+
  theme(plot.tag.position = "topright",
        plot.subtitle=element_text(size=10, face="italic", color="black"))
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
## ovary
## all
aedes2 <- percent_taxonomic_plot_test(data=data_aedes_ovary, variable=sample_Species, selection="Aedes aegypti", group="Genus", organ="Ovary", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(subtitle = "Aedes aegypti - Ovary")+
  theme(plot.tag.position = "topright",
        plot.subtitle=element_text(size=10, face="italic", color="black"))
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
p_genus <- plot_grid(pipiens1+ theme(legend.position="none"), 
                     pipiens2+ theme(legend.position="none"), 
                     pipiens3+ theme(legend.position="none"), 
                     pipiens4+ theme(legend.position="none"), 
                     pipiens5+ theme(legend.position="none"),
                     quinque1+ theme(legend.position="none"), 
                     quinque2+ theme(legend.position="none"), 
                     quinque3+ theme(legend.position="none"), 
                     quinque4+ theme(legend.position="none"), 
                     plot.new(),
                     aedes1+ theme(legend.position="none"), 
                     aedes2+ theme(legend.position="none"), 
                     plot.new(),
                     plot.new(),
                     plot.new(),
                     nrow=3, 
                     ncol=5)

# +
#   draw_plot_label(c("A", "B", "C"), c(0, 0, 0), c(1, 2/3, 1/3), size = 15)

p_genus

Counts after removing samples

Create a dataframe

x <- c("Culex pipiens", "Field - Bosc", "Field - Camping Europe", "Laboratory - Lavar", 
       "Culex quinquefasciatus", "Field - Guadeloupe", "Laboratory - Slab TC (Wolbachia -)",
       "Aedes aegypti (Guadeloupe)",
       "Total")
y <- c("Reads", "Oligotypes", "Samples")

df <- data.frame(matrix(ncol=3, nrow=9))
rownames(df) <- x
colnames(df) <- y

df2 <- df
df3 <- df

Whole + Ovary

# Culex pipiens
ps.pipiens <- subset_samples(ps.filter, Species=="Culex pipiens")
ps.pipiens <- check_ps(ps.pipiens)

## All Strain
### oligotype
nrow(ps.pipiens@otu_table) -> df["Culex pipiens", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data) -> df["Culex pipiens", "Samples"]
### reads
compute_read_counts(ps.pipiens) -> df["Culex pipiens", "Reads"]


## Bosc
### oligotype
nrow(otu_table(ps.pipiens %>% 
                 subset_samples(Strain=="Field - Bosc") %>% 
                 check_ps())) -> df["Field - Bosc", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data %>% subset_samples(Strain=="Field - Bosc")) -> df["Field - Bosc", "Samples"]
### reads
compute_read_counts(ps.pipiens %>% subset_samples(Strain=="Field - Bosc")) -> df["Field - Bosc", "Reads"]


## Camping Europe
### oligotype
nrow(otu_table(ps.pipiens %>% 
                 subset_samples(Strain=="Field - Camping Europe") %>% 
                 check_ps())) -> df["Field - Camping Europe", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data %>% subset_samples(Strain=="Field - Camping Europe")) -> df["Field - Camping Europe", "Samples"]
### reads
compute_read_counts(ps.pipiens %>% subset_samples(Strain=="Field - Camping Europe")) -> df["Field - Camping Europe", "Reads"]

## Lavar (labo)
### oligotype
nrow(otu_table(ps.pipiens %>% 
                 subset_samples(Strain=="Laboratory - Lavar") %>% 
                 check_ps())) -> df["Laboratory - Lavar", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data %>% subset_samples(Strain=="Laboratory - Lavar")) -> df["Laboratory - Lavar", "Samples"]
### reads
compute_read_counts(ps.pipiens %>% subset_samples(Strain=="Laboratory - Lavar")) -> df["Laboratory - Lavar", "Reads"]


# Culex quinquefasciatus
ps.quinque <- subset_samples(ps.filter, Species=="Culex quinquefasciatus")
ps.quinque <- check_ps(ps.quinque)

## All Strain
### oligotype
nrow(ps.quinque@otu_table) -> df["Culex quinquefasciatus", "Oligotypes"]
### samples
nrow(ps.quinque@sam_data) -> df["Culex quinquefasciatus", "Samples"]
### reads
compute_read_counts(ps.quinque) -> df["Culex quinquefasciatus", "Reads"]


## Guadeloupe
### oligotype
nrow(otu_table(ps.quinque %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df["Field - Guadeloupe", "Oligotypes"]
### samples
nrow(ps.quinque@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df["Field - Guadeloupe", "Samples"]
### reads
compute_read_counts(ps.quinque %>% subset_samples(Strain=="Field - Guadeloupe")) -> df["Field - Guadeloupe", "Reads"]


## Wolbachia -
### oligotype
nrow(otu_table(ps.quinque %>% 
                 subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)") %>% 
                 check_ps())) -> df["Laboratory - Slab TC (Wolbachia -)", "Oligotypes"]
### samples
nrow(ps.quinque@sam_data %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df["Laboratory - Slab TC (Wolbachia -)", "Samples"]
### reads
compute_read_counts(ps.quinque %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df["Laboratory - Slab TC (Wolbachia -)", "Reads"]


# Aedes aegypti
ps.aedes <- subset_samples(ps.filter, Species=="Aedes aegypti")
ps.aedes <- check_ps(ps.aedes)

## Guadeloupe
### oligotype
nrow(otu_table(ps.aedes %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df["Aedes aegypti (Guadeloupe)", "Oligotypes"]
### samples
nrow(ps.aedes@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df["Aedes aegypti (Guadeloupe)", "Samples"]
### reads
compute_read_counts(ps.aedes %>% subset_samples(Strain=="Field - Guadeloupe")) -> df["Aedes aegypti (Guadeloupe)", "Reads"]

# Total
### oligotype
nrow(ps.filter@otu_table) -> df["Total", "Oligotypes"]
### samples
nrow(ps.filter@sam_data) -> df["Total", "Samples"]
### reads
compute_read_counts(ps.filter) -> df["Total", "Reads"]

df %>% 
  kbl() %>%
  kable_paper("hover", full_width = F)
Reads Oligotypes Samples
Culex pipiens 2574050 49 82
Field - Bosc 803574 36 23
Field - Camping Europe 869064 38 24
Laboratory - Lavar 901412 48 35
Culex quinquefasciatus 1924657 59 20
Field - Guadeloupe 1721053 47 7
Laboratory - Slab TC (Wolbachia -) 203604 45 13
Aedes aegypti (Guadeloupe) 1953916 54 11
Total 6452623 67 113

Whole

# Culex pipiens
ps.pipiens.whole <- ps.pipiens %>% subset_samples(Organ=="Whole")
ps.pipiens.whole <- ps.pipiens.whole %>% check_ps()

## All Strain
### oligotype
nrow(ps.pipiens.whole@otu_table) -> df2["Culex pipiens", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data) -> df2["Culex pipiens", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole) -> df2["Culex pipiens", "Reads"]


## Bosc
### oligotype
nrow(otu_table(ps.pipiens.whole %>% 
                 subset_samples(Strain=="Field - Bosc") %>% 
                 check_ps())) -> df2["Field - Bosc", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data %>% subset_samples(Strain=="Field - Bosc")) -> df2["Field - Bosc", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole %>% subset_samples(Strain=="Field - Bosc")) -> df2["Field - Bosc", "Reads"]


## Camping Europe
### oligotype
nrow(otu_table(ps.pipiens.whole %>% 
                 subset_samples(Strain=="Field - Camping Europe") %>% 
                 check_ps())) -> df2["Field - Camping Europe", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data %>% subset_samples(Strain=="Field - Camping Europe")) -> df2["Field - Camping Europe", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole %>% subset_samples(Strain=="Field - Camping Europe")) -> df2["Field - Camping Europe", "Reads"]

## Lavar (labo)
### oligotype
nrow(otu_table(ps.pipiens.whole %>% 
                 subset_samples(Strain=="Laboratory - Lavar") %>% 
                 check_ps())) -> df2["Laboratory - Lavar", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data %>% subset_samples(Strain=="Laboratory - Lavar")) -> df2["Laboratory - Lavar", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole %>% subset_samples(Strain=="Laboratory - Lavar")) -> df2["Laboratory - Lavar", "Reads"]



# Culex quinquefasciatus
ps.quinque.whole <- subset_samples(ps.quinque, Organ=="Whole")
ps.quinque.whole <- check_ps(ps.quinque.whole)

## All Strain
### oligotype
nrow(ps.quinque.whole@otu_table) -> df2["Culex quinquefasciatus", "Oligotypes"]
### samples
nrow(ps.quinque.whole@sam_data) -> df2["Culex quinquefasciatus", "Samples"]
### reads
compute_read_counts(ps.quinque.whole) -> df2["Culex quinquefasciatus", "Reads"]


## Guadeloupe
### oligotype
nrow(otu_table(ps.quinque.whole %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df2["Field - Guadeloupe", "Oligotypes"]
### samples
nrow(ps.quinque.whole@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df2["Field - Guadeloupe", "Samples"]
### reads
compute_read_counts(ps.quinque.whole %>% subset_samples(Strain=="Field - Guadeloupe")) -> df2["Field - Guadeloupe", "Reads"]


## Wolbachia -
### oligotype
nrow(otu_table(ps.quinque.whole %>% 
                 subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)") %>% 
                 check_ps())) -> df2["Laboratory - Slab TC (Wolbachia -)", "Oligotypes"]
### samples
nrow(ps.quinque.whole@sam_data %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df2["Laboratory - Slab TC (Wolbachia -)", "Samples"]
### reads
compute_read_counts(ps.quinque.whole %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df2["Laboratory - Slab TC (Wolbachia -)", "Reads"]



# Aedes aegypti
ps.aedes.whole <- subset_samples(ps.aedes, Organ=="Whole")
ps.aedes.whole <- check_ps(ps.aedes.whole)

## Guadeloupe
### oligotype
nrow(otu_table(ps.aedes.whole %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df2["Aedes aegypti (Guadeloupe)", "Oligotypes"]
### samples
nrow(ps.aedes.whole@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df2["Aedes aegypti (Guadeloupe)", "Samples"]
### reads
compute_read_counts(ps.aedes.whole %>% subset_samples(Strain=="Field - Guadeloupe")) -> df2["Aedes aegypti (Guadeloupe)", "Reads"]


# Total
ps.whole <- ps.filter %>% subset_samples(Organ=="Whole")
ps.whole <- ps.whole %>% check_ps()
### oligotype
nrow(ps.whole@otu_table) -> df2["Total", "Oligotypes"]
### samples
nrow(ps.whole@sam_data) -> df2["Total", "Samples"]
### reads
compute_read_counts(ps.whole) -> df2["Total", "Reads"]

df2 %>% 
  kbl() %>%
  kable_paper("hover", full_width = F)
Reads Oligotypes Samples
Culex pipiens 1251213 45 41
Field - Bosc 545790 33 13
Field - Camping Europe 98251 33 6
Laboratory - Lavar 607172 45 22
Culex quinquefasciatus 205364 57 17
Field - Guadeloupe 1760 38 4
Laboratory - Slab TC (Wolbachia -) 203604 45 13
Aedes aegypti (Guadeloupe) 1945807 54 9
Total 3402384 67 67

Ovary

# Culex pipiens
ps.pipiens.ovary <- ps.pipiens %>% subset_samples(Organ=="Ovary")
ps.pipiens.ovary <- ps.pipiens.ovary %>% check_ps()

## All Strain
### oligotype
nrow(ps.pipiens.ovary@otu_table) -> df3["Culex pipiens", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data) -> df3["Culex pipiens", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary) -> df3["Culex pipiens", "Reads"]


## Bosc
### oligotype
nrow(otu_table(ps.pipiens.ovary %>% 
                 subset_samples(Strain=="Field - Bosc") %>% 
                 check_ps())) -> df3["Field - Bosc", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data %>% subset_samples(Strain=="Field - Bosc")) -> df3["Field - Bosc", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary %>% subset_samples(Strain=="Field - Bosc")) -> df3["Field - Bosc", "Reads"]


## Camping Europe
### oligotype
nrow(otu_table(ps.pipiens.ovary %>% 
                 subset_samples(Strain=="Field - Camping Europe") %>% 
                 check_ps())) -> df3["Field - Camping Europe", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data %>% subset_samples(Strain=="Field - Camping Europe")) -> df3["Field - Camping Europe", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary %>% subset_samples(Strain=="Field - Camping Europe")) -> df3["Field - Camping Europe", "Reads"]

## Lavar (labo)
### oligotype
nrow(otu_table(ps.pipiens.ovary %>% 
                 subset_samples(Strain=="Laboratory - Lavar") %>% 
                 check_ps())) -> df3["Laboratory - Lavar", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data %>% subset_samples(Strain=="Laboratory - Lavar")) -> df3["Laboratory - Lavar", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary %>% subset_samples(Strain=="Laboratory - Lavar")) -> df3["Laboratory - Lavar", "Reads"]



# Culex quinquefasciatus
ps.quinque.ovary <- subset_samples(ps.quinque, Organ=="Ovary")
ps.quinque.ovary <- check_ps(ps.quinque.ovary)

## All Strain
### oligotype
nrow(ps.quinque.ovary@otu_table) -> df3["Culex quinquefasciatus", "Oligotypes"]
### samples
nrow(ps.quinque.ovary@sam_data) -> df3["Culex quinquefasciatus", "Samples"]
### reads
compute_read_counts(ps.quinque.ovary) -> df3["Culex quinquefasciatus", "Reads"]


## Guadeloupe
### oligotype
nrow(otu_table(ps.quinque.ovary %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df3["Field - Guadeloupe", "Oligotypes"]
### samples
nrow(ps.quinque.ovary@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df3["Field - Guadeloupe", "Samples"]
### reads
compute_read_counts(ps.quinque.ovary %>% subset_samples(Strain=="Field - Guadeloupe")) -> df3["Field - Guadeloupe", "Reads"]


## Wolbachia -
### oligotype
# nrow(otu_table(ps.quinque.ovary %>% 
#                  subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)") %>% 
#                  check_ps())) -> df3["Laboratory - Slab TC (Wolbachia -)", "Oligotypes"]
### samples
# nrow(ps.quinque.ovary@sam_data %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df3["Laboratory - Slab TC (Wolbachia -)", "Samples"]
# ### reads
# compute_read_counts(ps.quinque.ovary %>% subset_samples(Strain=="Laboratory - Slab TC (Wolbachia -)")) -> df3["Laboratory - Slab TC (Wolbachia -)", "Reads"]



# Aedes aegypti
ps.aedes.ovary <- subset_samples(ps.aedes, Organ=="Ovary")
ps.aedes.ovary <- check_ps(ps.aedes.ovary)

## Guadeloupe
### oligotype
nrow(otu_table(ps.aedes.ovary %>% 
                 subset_samples(Strain=="Field - Guadeloupe") %>% 
                 check_ps())) -> df3["Aedes aegypti (Guadeloupe)", "Oligotypes"]
### samples
nrow(ps.aedes.ovary@sam_data %>% subset_samples(Strain=="Field - Guadeloupe")) -> df3["Aedes aegypti (Guadeloupe)", "Samples"]
### reads
compute_read_counts(ps.aedes.ovary %>% subset_samples(Strain=="Field - Guadeloupe")) -> df3["Aedes aegypti (Guadeloupe)", "Reads"]


# Total
ps.ovary <- ps.filter %>% subset_samples(Organ=="Ovary")
ps.ovary <- ps.ovary %>% check_ps()
### oligotype
nrow(ps.ovary@otu_table) -> df3["Total", "Oligotypes"]
### samples
nrow(ps.ovary@sam_data) -> df3["Total", "Samples"]
### reads
compute_read_counts(ps.ovary) -> df3["Total", "Reads"]

df3[is.na(df3)] <- 0

df3 %>% 
  kbl() %>%
  kable_paper("hover", full_width = F)
Reads Oligotypes Samples
Culex pipiens 1322837 34 41
Field - Bosc 257784 26 10
Field - Camping Europe 770813 27 18
Laboratory - Lavar 294240 32 13
Culex quinquefasciatus 1719293 26 3
Field - Guadeloupe 1719293 26 3
Laboratory - Slab TC (Wolbachia -) 0 0 0
Aedes aegypti (Guadeloupe) 8109 30 2
Total 3050239 48 46

Save

setwd(path_tsv)
write.table(df, "1D_Counts_all.tsv", sep="\t", row.names = TRUE, col.names=NA)
write.table(df2, "1D_Counts_whole.tsv", sep="\t", row.names = TRUE, col.names=NA)
write.table(df3, "1D_Counts_ovary.tsv", sep="\t", row.names = TRUE, col.names=NA)

Change levels of factors for organ and species in ps.filter

change_organ(ps.filter)
change_species(ps.filter)

Save phyloseq object after decontam

setwd(path_rdata)
saveRDS(ps.filter, "1D_MED_phyloseq_decontam.rds")

setwd(path_plot)

tiff("1D_counts_by_sample.tiff", units="in", width=20, height=18, res=300)
p_counts
dev.off()
## quartz_off_screen 
##                 2
tiff("1D_counts_by_genus_V2.tiff", units="in", width=25, height=18, res=300)
p_genus
dev.off()
## quartz_off_screen 
##                 2
png("1D_counts_by_sample.png", units="in", width=20, height=18, res=300)
p_counts
dev.off()
## quartz_off_screen 
##                 2
png("1D_counts_by_genus_V2.png", units="in", width=25, height=18, res=300)
p_genus
dev.off()
## quartz_off_screen 
##                 2